home *** CD-ROM | disk | FTP | other *** search
- program Rsort; { (C) Copyright 1985 by Mark E Johnson }
- { 2272-F Benson Ave }
- { St. Paul, MN 55114 }
- { (612)-698-3686 }
-
-
- { RSORT is a file sort program that uses an n-way merge to sort files }
- { of virtually unlimited size. It does this by reading the input file }
- { in small chunks, sorting those records, and outputing them to a work }
- { file. The work file is managed by a table routine that knows where }
- { the beginning and end of each 'sub-file' is. When all the input }
- { records have been read and sorted to the work file, the individual }
- { sub-files are merged together in sequence and output to the destina- }
- { tion file. ( If you've played Solitaire then you know how this works ) }
-
- { An enhancement to this sort program is the idea of OWN-CODE routines. }
- { You can modify the record either as it's coming into the sort, or as }
- { it's being written to the output file. Descriptions of the own-code }
- { routines are detailed in the code. The routines follow the practice }
- { of NCR's SORT2 program running on an NCR 8450 in the 'N' or 'V' mode. }
-
- { This program has been developed in standard pascal and has been tested }
- { and used in Turbo Pascal. It is meant only for sorting random record }
- { files, although it is a trivial task to convert it to sort sequential }
- { files of variable length. Tests on a SLICER 80186 computer running }
- { Concurrent CP/M-86 show that it will sort .5 megabytes per minute. }
- { Don't expect the same performance from a IBM PC type machine. }
- { This time could be decreased by using a more efficient sort, or possi- }
- { bly by varying the number of records per pass that it processes. }
-
- { NOTE: Try to optimize the performance of this program by placing the }
- { TEMP work file on a separate drive. If you have a RAM-DISK, then this }
- { is a good opportunity to use it. you may place the source and destin- }
- { ation files on the same drive. Remember that you must have enough }
- { free space on your drives to accomodate three files of the same size }
- { as your source file (unless you write over the source, then it's two) }
-
- { ENDDAT should be a value that you expect will NEVER appear as a data }
- { item in your key field. It MUST evaluate the same type as your key. }
-
- { The following equates indicate the number of records to be sorted in }
- { each pass. Generally you should allocate 16K worth of buffer. If the }
- { records in the file are 256 bytes, then set passlen to 64. }
- { PASSRECS must be greater than Passlen. The maximum number of records }
- { in the sort file is determined by PASSLEN * SUBFILES. To increase or }
- { decrease the maximum # of records, change SUBFILES accordingly. }
-
-
- const
- TEMPFILE = 'temp.srt'; { name of sort work file }
- ENDDAT = 'zzzzz';
- PASSLEN = 100; { # of records per pass }
- PASSRECS = 101; { always PASSLEN+1 }
- SUBFILES = 15; { Max number of SUBFILES to merge }
-
-
- { The following record fields are the definition of a record in the }
- { file to be sorted. Insert the record declaration of the file you }
- { want to sort here. Change the name of the key field to KEY_ITEM. }
-
- type
- rectype = record
- Name_First : string[20];
- KEY_ITEM : string[20]; { was name_last }
- Phone : string[8];
- area_code : string[8];
- end;
-
- { END OF SORT RECORD DECLARATION }
- { nothing needs to be changed past here }
-
- infotype = record
- first : integer;
- next : integer;
- last : integer;
- end;
-
- var
- sortbuf : array[1..PASSRECS] of rectype;
-
- Done,Flag : boolean;
- X,R,Hold : integer;
-
- Sortndx : integer; { Sub-record number of Sortbuf }
- Filenum : integer; { Current or last subfile }
-
- Info : array[1..SUBFILES] of infotype;
-
- Totrecs : integer; { Total records in int file }
- I,K : integer;
- inkey,outkey : integer; { Input and output keys }
- EOFlag,quit : boolean;
- Ret_Code : Char; { Return code from Own Code routines }
- Infile,Temp,Outfile : File of rectype;
- inname,outname : string[20];
-
- label again, alldone;
-
- procedure Own_Code1; { Own code routine for input records }
-
- { This routine is called after each record is input before }
- { sorting. You may write code here to modify or delete the }
- { record before sorting. One common use of this routine is to }
- { compare the record for a type of field which you do not want }
- { in the sorted file. For example, if we are sorting a mailing }
- { list, we may not want any names from outside the USA. }
- { We could check the ZIP code and pass only those ZIPs that }
- { indicate an address inside Continental USA. }
-
- { CALLED FROM: MAIN }
- { PARAMETERS : current record is in SORTBUF[SORTNDX] }
-
- { Returns "RET_CODE" Which may be one of the following values: }
- { D - Delete this record (Throw it away) }
- { K - Keep this record }
-
- begin
- Ret_Code:='K';
- End;
-
- procedure Own_Code2; { Own code routine for output records }
-
- { This routine is called before a sorted record is output to }
- { the destination file. One common use of this routine may }
- { be to eleminate any duplicate records, or convert ASCII to }
- { EBCDIC, upper to lower case, or more involved operations }
- { such as filling in certain fields based on calculated }
- { results from other fields. }
-
- { CALLED FROM: OUTPUT }
- { PARAMETERS : current record is in SORTBUF[HOLD] }
- { RETURNS : "RET_CODE" Which may be one of the following values: }
- { D - Delete this record (Throw it away) }
- { K - Keep this record }
-
- begin
- Ret_Code:='K'; { for now, Always keep current record }
- End;
-
- procedure init;
- begin
- writeln('Enter input file name ');
- readln(inname);
- writeln('Enter output file name ');
- readln(outname);
- end;
-
- procedure Getinp;
-
- { this procedure reads a record from the input file and }
- { stores it in SORTBUF[SORTNDX] }
-
- { CALLED FROM: MAIN }
- { RETURNS : new record is in SORTBUF[SORTNDX] }
-
- begin
- if eof(infile) then
- EOFLAG:=true
- else
- begin
- seek(infile,inkey);
- read(infile,sortbuf[sortndx]);
- inkey:=inkey+1;
- end;
-
- End;
-
- procedure Puttemp; { Write record to temp file }
- { This procedure writes the record in SORTBUF[I] to the work file }
-
- { CALLED FROM: MAIN }
- { PARAMETERS : current record is in SORTBUF[I] }
-
- begin
- seek(temp,k);
- write(temp,sortbuf[i]);
- K:=K+1;
-
- End;
-
- procedure Output;
- { This procedure writes a record to the destination file }
-
- { CALLED FROM: MERGE }
- { PARAMETERS : current record is in SORTBUF[HOLD] }
- { CALLS : OWN_CODE2 }
-
- begin
- Own_Code2;
- If Ret_code = 'K' Then
- begin
- seek(outfile,outkey);
- write(outfile,sortbuf[hold]);
- outkey:=outkey+1;
- end;
- if info[hold].next <= info[hold].last then
- begin
- seek(temp,info[hold].next);
- read(temp,sortbuf[hold]);
- info[hold].next:=info[hold].next+1;
- if eof(temp) then
- sortbuf[hold].KEY_ITEM:=ENDDAT;
- End
- Else
- sortbuf[hold].KEY_ITEM:=ENDDAT;
- r:=hold+1;
- End;
-
- procedure Sort; { Bubble sort }
- { this routine sorts the record array SORTBUF[1..SORTNDX] in ascending }
- { order, using KEY_ITEM as the sort key }
-
- { CALLED FROM: MAIN }
- { PARAMETERS : SORTBUF[1..SORTNDX] }
-
-
- var
- C : rectype; { hold area for swapping }
- I : integer;
- re_iter : boolean;
-
- begin
- re_iter:=TRUE;
- while re_iter=TRUE
- begin
- re_iter:=FALSE;
- for i:=1 to sortndx-1 do
- begin
- If Sortbuf[i].KEY_ITEM > Sortbuf[i+1].KEY_ITEM Then
- begin
- C:=sortbuf[i];
- sortbuf[i]:=sortbuf[i+1];
- sortbuf[i+1]:=c;
- re_iter:=TRUE;
- end;
- end;
- end;
- End;
-
- procedure Merge;
- { This procedure merges the subfiles in the workfile, and creates }
- { the destination file. }
-
- { CALLED FROM: MAIN }
- { PARAMETERS : INFO[1..FILENUM] contains the start and end record }
- { for each subfile in file TEMP. }
- { CALLS : OUTPUT }
-
- var
- J : integer;
- i : integer;
- begin
- assign(temp,TEMPFILE);
- reset(temp);
- for i:=1 to filenum do
- begin
- If info[i].First >= 0 Then
- begin
- seek(temp,info[i].first);
- read(temp,sortbuf[i]);
- info[i].Next:=info[i].First+1;
- End;
- end;
- writeln('Performing Merge');
- Done:=FALSE;
- while done=FALSE Do
- begin
- r:=1;
- hold:=r;
- if r=hold Then r:=r+1;
- if r > PASSLEN then r:=1;
- if r=hold then
- writeln('Internal error, R=HOLD = ',hold);
- for i:=1 to PASSRECS-1 do { Filenum-1 }
- begin
- if sortbuf[hold].KEY_ITEM <= sortbuf[r].KEY_ITEM Then
- begin
- Flag:=TRUE;
- r:=r+1;
- end
- Else
- begin
- flag:=FALSE;
- hold:=r;
- r:=r+1;
- end;
- if r > filenum then
- r:=1;
- end;
- if flag=TRUE then
- output;
- done:=TRUE;
- for j:=1 to filenum do
- begin
- if sortbuf[j].KEY_ITEM < ENDDAT then
- Done:=FALSE;
- end;
- End;
- Close(Outfile);
- Close(Temp);
-
- End;
-
- begin { MAIN }
- { This is the main program. It starts by building the TEMP file, then }
- { calling the procedure MERGE. }
-
- { CALLS : INIT, GETINP, OWN_CODE1, SORT, MERGE }
-
- init;
- assign(infile,inname);
- reset(infile);
- assign(temp,TEMPFILE);
- rewrite(temp);
- assign(outfile,outname);
- rewrite(outfile);
-
- EOFlag:=FALSE;
- Quit:=FALSE;
- Sortndx:=1;
- Filenum:=1;
- Info[1].First:=0;
- inkey:=0; { Starting key for input file }
- outkey:=0; { Starting key for output file }
- K:=0; { Starting key for Temp file }
-
- totrecs:=1;
- while quit = FALSE do
- begin
- Again:
- Getinp; { get a record }
- Own_code1;
- if ret_code='D' Then
- Goto Again;
- Sortndx:=Sortndx+1;
- If (Sortndx > PASSLEN) or (EOFlag=TRUE) Then { Buffer overflow }
- begin
- Sortndx:=Sortndx-1;
- If EOFlag=TRUE Then
- begin
- Totrecs:=Totrecs-1;
- quit:=TRUE;
- end;
- Sort; { Sort buffer }
- if sortndx = 0 then
- goto alldone;
- writeln('Writing Subfile ',Filenum);
- for I:=1 to Sortndx do { Write to temp file }
- Puttemp;
- Info[Filenum].Last:=Totrecs-1; { Save last record number }
- Filenum:=Filenum+1; { Start new subfile }
- Info[Filenum].First:=Totrecs; { Save starting record }
- Sortndx:=1; { Reset sort buffer index }
- end;
- totrecs:=totrecs+1;
- end;
- alldone:
- info[filenum].last:=totrecs;
- filenum:=filenum-1;
- writeln('Total records input = ',totrecs-1);
- Close(Infile);
- Close(Temp);
- Merge;
- writeln('Total records merged: ',outkey);
- End.